#### prepare packages and functions ####
require(Rcpp)
require(RcppArmadillo)
require(coda)
require(dichromat)

issue.var.name <- c("a.article9", "b.defense", "c.revisionism", "d.women", 
                    "e.gay", "f.immigrant", "g.growth", "h.tax")

color <- colorschemes$Categorical.12
plot.col <- c(color[12], color[12], color[12], 
              color[10], color[10], color[10], 
              color[2], color[2])

issue.label <- c("Revision of Article 9", "Increasing Defense Power", 
                 "Historical Revisionism", "Women's Empowerment", 
                 "Gay Marriage", "Accepting Foreign Workers", 
                 "Economic Growth", "Progressive Tax")

# function to estimate the finite mixture model of linear regressions
sourceCpp("finite_mixture_linear_regressions.cpp")

# function to deal with label switching
labeling <- function(mcmc.result) {
  MCMCsample <- list()
  for (i in 1:length(mcmc.result)) {
    MCMCsample[[i]] <- mcmc.result[[i]]$beta_r[1, , ]
  }
  l <- length(MCMCsample)
  g <- ncol(mcmc.result[[1]]$beta_r)
  label <- matrix(NA, l, g)
  for (i in 1:l) {
    intercept <- rowMeans(MCMCsample[[i]])
    for (j in 1:g) {
      label[i, j] <- which(c(g - rank(intercept) + 1) == j)
    }
  }
  label
}

# function to create mcmc.list objects from an Rcpp output
mcmc.list.create <- function(mcmc.result, parameter, label = NULL, group = NULL) {
  combined.list <- mcmc.list()
  if (parameter %in% c("beta_r", "beta_c")) {
    if (is.null(label)) {
      for (i in 1:length(mcmc.result)) {
        combined.list[[i]] <- mcmc(t(mcmc.result[[i]][[parameter]]))
      }
    } else {
      for (i in 1:length(mcmc.result)) {
        combined.list[[i]] <- mcmc(t(mcmc.result[[i]][[parameter]][, label[i, group], ]))
      }
    }
  } else {
    if (is.null(label)) {
      for (i in 1:length(mcmc.result)) {
        combined.list[[i]] <- mcmc(mcmc.result[[i]][[parameter]][, ])
      }
    } else {
      for (i in 1:length(mcmc.result)) {
        combined.list[[i]] <- mcmc(mcmc.result[[i]][[parameter]][, label[i, group]])
      }
    }
  }
  combined.list 
}

# function to compute the WAIC
WAIC <- function(loglik) {
  lppd <- sum(log(colMeans(exp(loglik))))
  p.WAIC2 <- sum(apply(loglik, 2, var))
  -2 * (lppd - p.WAIC2)
}

# function to summarize posterior distributions
posterior.summary <- function(x, CI = TRUE, prob = 0.95) {
  posterior.mean <- mean(x)
  if (CI == TRUE) {
    HPD.interval <- HPDinterval(as.mcmc(x), prob = prob)
    c(posterior.mean, HPD.interval)
  } else {
    posterior.mean
  }
}

#### load data ####
task.data <- read.csv("task_data.csv")

# reorder the levels of attribute variables
position.label <- c("Agree", "Neither", "Disagree")
task.data$a.article9 <- factor(task.data$a.article9, levels = position.label)
task.data$b.defense <- factor(task.data$b.defense, levels = position.label)
task.data$c.revisionism <- factor(task.data$c.revisionism, levels = position.label)
task.data$d.women <- factor(task.data$d.women, levels = position.label)
task.data$e.gay <- factor(task.data$e.gay, levels = position.label)
task.data$f.immigrant <- factor(task.data$f.immigrant, levels = position.label)
task.data$g.growth <- factor(task.data$g.growth, levels = position.label)
task.data$h.tax <- factor(task.data$h.tax, levels = position.label)

original.data <- subset(read.csv("ideological_label_conjoint_data.csv", 
                                 fileEncoding = "utf8"), 
                        Q5.2 < 53)
N <- nrow(original.data)  # number of respondents including satisficers

## prepare variables
# individual average of rating responses
average.rating <- rep(NA, N)
for (i in unique(task.data$respondent.id)) {
  average.rating[i] <- mean(task.data$rating[task.data$respondent.id == i])
}
task.data$average.rating <- average.rating[task.data$respondent.id]

# dependent variables
y.c <- task.data$choice  # choice-based
y.r <- task.data$rating - task.data$average.rating  # rating based

# independent variables
lm.result <- lm(rating ~ a.article9 + b.defense + c.revisionism + d.women + 
                  e.gay + f.immigrant + g.growth + h.tax, 
                data = task.data, x = TRUE)
x <- lm.result$x[, -1]

# rearrange respondent ids
id <- as.numeric(as.factor(task.data$respondent.id))

# number of respondents
l <- length(unique(id))

#### estimation ####
# CAUTION: It took over 15 hours to estimate the models in the authors'
# environment (using a high performance PC as of 2021).
# If you want to skip the estimation process, request Rdata files
# containing the MCMC results from the corresponding author.
for (i in 1:6) {
  g <- i
  FMR.result <- list()
  for (j in 1:3) {
    set.seed(1000 * j)
    FMR.result[[j]] <- FMR(y_r = y.r,  # rating-based outcomes
                           y_c = y.c,  # choice-based outcomes
                           x = x,  # independent variables
                           id = id,  # respondent ids
                           l = l,  # number of respondents
                           g = g,  # number of latent groups
                           b0_r = rep(0, ncol(x) + 1),  # prior means for rating coefficients
                           B0_r = diag(100 ^ 2, ncol(x) + 1),  # prior variance matrix for rating coefficients
                           b0_c = rep(0, ncol(x) + 1),  # prior means for choice coefficients
                           B0_c = diag(100 ^ 2, ncol(x) + 1),  # prior variance matrix for choice coefficients
                           v0_r = 0.01,  # prior degrees of freedom for the error variance of rating
                           s0_r = 1,  # prior scale for the error variance of rating
                           v0_c = 0.01,  # prior degrees of freedom for the error variance of choice
                           s0_c = 1,  # prior scale for the error variance of choice
                           w0 = rep(1, g),  # prior concentration parameter
                           burnin = 5000,  # number of burn-in iterations
                           iter = 10000,  # number of sampling iterations
                           thin = 10  # thinning interval
    )
  }
  print(paste0("Estimation of the ", i, "-group model has been finished at ", date(), "."))
  save(FMR.result, 
       file = paste0("FMR_result_", i, ".Rdata"))
  rm(FMR.result)
  gc(); gc()
}

# check convergence and compute the WAICs
for (i in 1:6) {
  load(paste0("FMR_result_", i, ".Rdata"))
  # relabel to deal with between-chain label switching
  if (i > 1) {
    label <- labeling(FMR.result)
  } else {
    label <- matrix(1, 3, 1)
  }
  print(paste0("Convergence check for the ", i, "-group model:"))
  for (j in 1:i) {
    print(sum(gelman.diag(mcmc.list.create(FMR.result, "beta_r", label, j), 
                          multivariate = FALSE)$psrf[, 1] > 1.1))
    print(sum(gelman.diag(mcmc.list.create(FMR.result, "beta_c", label, j), 
                          multivariate = FALSE)$psrf[, 1] > 1.1))
    print(sum(gelman.diag(mcmc.list.create(FMR.result, "sigma_r", label, j), 
                          multivariate = FALSE)$psrf[, 1] > 1.1))
    print(sum(gelman.diag(mcmc.list.create(FMR.result, "sigma_c", label, j), 
                          multivariate = FALSE)$psrf[, 1] > 1.1))
  }
  print(paste0("The ", i, "-group model's WAIC:"))
  print(round(WAIC(rbind(FMR.result[[1]]$loglik, 
                         FMR.result[[2]]$loglik, 
                         FMR.result[[3]]$loglik))))
  rm(FMR.result)
  gc(); gc()
}

#### post-processing ####
# adopt the four-group model
load("FMR_result_4.Rdata")

# relabel to deal with between-chain label switching
label <- labeling(FMR.result)

# reorder latent groups to make interpretations easier
beta_r.sample <- array(NA, c(17, 4, 30000))
for (i in 1:4) {
  beta_r.sample[, i, ] <- cbind(FMR.result[[1]]$beta_r[, label[1, i], ], 
                                FMR.result[[2]]$beta_r[, label[2, i], ], 
                                FMR.result[[3]]$beta_r[, label[3, i], ])
}
round(apply(beta_r.sample, c(1, 2), mean), 3)
group.order <- c(1, 4, 2, 3)  # reorder groups based on the coefficients of Article 9 in absolute value

## group assignment indices
S.sample <- array(NA, c(l, 4, 30000))
for (i in 1:4) {
  S.sample[, i, ] <- cbind(FMR.result[[1]]$S[, label[1, i], ], 
                           FMR.result[[2]]$S[, label[2, i], ], 
                           FMR.result[[3]]$S[, label[3, i], ])
}
S.sample <- S.sample[, group.order, ]

## obtain group-wise estimates of MMs by Monte Carlo marginalization
MM.posterior.sim.choice <- MM.posterior.sim.rating <- array(NA, c(24, 4, 30000))
for (i in 1:30000) {
  for (j in 1:4) {
    # detect observations assigned to group j in iteration i
    temp.data <- task.data[id %in% which(S.sample[, j, i] == 1), ]
    n.obs <- nrow(temp.data)
    # posterior draw of the MM for the three levels of attribute k
    for (k in 1:8) {
      temp.mean <- tapply(temp.data$choice, temp.data[, issue.var.name[k]], mean)
      temp.sd <- tapply(temp.data$choice, temp.data[, issue.var.name[k]], sd) / sqrt(n.obs)
      MM.posterior.sim.choice[(3 * (k - 1) + 1):(3 * (k - 1) + 3), j, i] <- 
        rnorm(3, temp.mean, temp.sd)
      temp.mean <- tapply(temp.data$rating, temp.data[, issue.var.name[k]], mean)
      temp.sd <- tapply(temp.data$rating, temp.data[, issue.var.name[k]], sd) / sqrt(n.obs)
      MM.posterior.sim.rating[(3 * (k - 1) + 1):(3 * (k - 1) + 3), j, i] <- 
        rnorm(3, temp.mean, temp.sd)
    }
  }
}

# Figure 3(a)
MM.posterior.summary.choice <- apply(MM.posterior.sim.choice, 1:2, posterior.summary)

png("Figure_3a.png", width = 6.2, height = 4.4, units = "in", pointsize = 8, 
    res = 2000, type = "cairo")
par(mar = c(2, 0, 1, 0))
plot(NULL, NULL, type = "n", bty = "n", xlim = c(-0.97, 3.05), ylim = c(0, 33), 
     xlab = "", ylab = "", xaxt = "n", yaxt = "n")
for (i in 1:4) {
  segments(seq(0.2 + 0.75 * (i - 1), 0.8 + 0.75 * (i - 1), 0.15), 0, 
           seq(0.2 + 0.75 * (i - 1), 0.8 + 0.75 * (i - 1), 0.15), 33, lwd = 0.4, col = "gray")
  for (j in 1:8) {
    segments(0.17 + 0.75 * (i - 1), 35 - 4 * j, 
             0.83 + 0.75 * (i - 1), 35 - 4 * j, lty = 3, col = "gray")
    segments(0.17 + 0.75 * (i - 1), 34 - 4 * j, 
             0.83 + 0.75 * (i - 1), 34 - 4 * j, lty = 3, col = "gray")
    segments(0.17 + 0.75 * (i - 1), 33 - 4 * j, 
             0.83 + 0.75 * (i - 1), 33 - 4 * j, lty = 3, col = "gray")
    points(MM.posterior.summary.choice[1, (3 * (j - 1) + 1):(3 * j), i] + 0.75 * (i - 1), 
           c(35 - 4 * j, 34 - 4 * j, 33 - 4 * j), 
           pch = 19, col = plot.col[j])
    segments(MM.posterior.summary.choice[2, (3 * (j - 1) + 1):(3 * j), i] + 0.75 * (i - 1), 
             c(35 - 4 * j, 34 - 4 * j, 33 - 4 * j), 
             MM.posterior.summary.choice[3, (3 * (j - 1) + 1):(3 * j), i] + 0.75 * (i - 1), 
             c(35 - 4 * j, 34 - 4 * j, 33 - 4 * j), col = plot.col[j])
  }
  axis(1, at = seq(0.2 + 0.75 * (i - 1), 0.8 + 0.75 * (i - 1), 0.15), 
       labels = c("0.2", "", "0.5", "", "0.8"))
}
for (i in 1:8) {
  text(-0.97, 36 - 4 * i, labels = issue.label[i], pos = 4)
  text(-0.31, 35 - 4 * i, labels = "Agree", cex = 0.9, pos = 4)
  text(-0.31, 34 - 4 * i, labels = "Neither", cex = 0.9, pos = 4)
  text(-0.31, 33 - 4 * i, labels = "Disagree", cex = 0.9, pos = 4)
}
mtext(c("Group 1", "Group 2", "Group 3", "Group 4"), 
      at = seq(0.5, 2.75, 0.75), line = -0.5, cex = 1.2)
dev.off()

# Figure 3(b)
MM.posterior.summary.rating <- apply(MM.posterior.sim.rating, 1:2, posterior.summary)

png("Figure_3b.png", width = 6.2, height = 4.4, units = "in", pointsize = 8, 
    res = 2000, type = "cairo")
par(mar = c(2, 0, 1, 0))
plot(NULL, NULL, type = "n", bty = "n", xlim = c(0.6, 14), ylim = c(0, 33), 
     xlab = "", ylab = "", xaxt = "n", yaxt = "n")
for (i in 1:4) {
  segments(seq(4.5 + 2.5 * (i - 1), 6.5 + 2.5 * (i - 1), 0.5), 0, 
           seq(4.5 + 2.5 * (i - 1), 6.5 + 2.5 * (i - 1), 0.5), 33, lwd = 0.4, col = "gray")
  for (j in 1:8) {
    segments(4.4 + 2.5 * (i - 1), 35 - 4 * j, 
             6.6 + 2.5 * (i - 1), 35 - 4 * j, lty = 3, col = "gray")
    segments(4.4 + 2.5 * (i - 1), 34 - 4 * j, 
             6.6 + 2.5 * (i - 1), 34 - 4 * j, lty = 3, col = "gray")
    segments(4.4 + 2.5 * (i - 1), 33 - 4 * j, 
             6.6 + 2.5 * (i - 1), 33 - 4 * j, lty = 3, col = "gray")
    points(MM.posterior.summary.rating[1, (3 * (j - 1) + 1):(3 * j), i] + 2.5 * (i - 1), 
           c(35 - 4 * j, 34 - 4 * j, 33 - 4 * j), 
           pch = 19, col = plot.col[j])
    segments(MM.posterior.summary.rating[2, (3 * (j - 1) + 1):(3 * j), i] + 2.5 * (i - 1), 
             c(35 - 4 * j, 34 - 4 * j, 33 - 4 * j), 
             MM.posterior.summary.rating[3, (3 * (j - 1) + 1):(3 * j), i] + 2.5 * (i - 1), 
             c(35 - 4 * j, 34 - 4 * j, 33 - 4 * j), col = plot.col[j])
  }
  axis(1, at = seq(4.5 + 2.5 * (i - 1), 6.5 + 2.5 * (i - 1), 0.5), 
       labels = c("4.5", "", "5.5", "", "6.5"))
}
for (i in 1:8) {
  text(0.6, 36 - 4 * i, labels = issue.label[i], pos = 4)
  text(2.8, 35 - 4 * i, labels = "Agree", cex = 0.9, pos = 4)
  text(2.8, 34 - 4 * i, labels = "Neither", cex = 0.9, pos = 4)
  text(2.8, 33 - 4 * i, labels = "Disagree", cex = 0.9, pos = 4)
}
mtext(c("Group 1", "Group 2", "Group 3", "Group 4"), 
      at = seq(5.5, 13, 2.5), line = -0.5, cex = 1.2)
dev.off()

## estimation results of parameters other than coefficients (Table A.7)
# error variance for choice-based outcomes
sigma_c.sample <- matrix(NA, 30000, 4)
for (i in 1:4) {
  sigma_c.sample[, i] <- rbind(FMR.result[[1]]$sigma_c[, label[1, i]], 
                               FMR.result[[2]]$sigma_c[, label[2, i]], 
                               FMR.result[[3]]$sigma_c[, label[3, i]])
}
sigma_c.sample <- sigma_c.sample[, group.order]
sigma_c.summary <- apply(sigma_c.sample, 2, posterior.summary, CI = TRUE)
round(sigma_c.summary, 2)

# error variance for rating-based outcomes
sigma_r.sample <- matrix(NA, 30000, 4)
for (i in 1:4) {
  sigma_r.sample[, i] <- rbind(FMR.result[[1]]$sigma_r[, label[1, i]], 
                               FMR.result[[2]]$sigma_r[, label[2, i]], 
                               FMR.result[[3]]$sigma_r[, label[3, i]])
}
sigma_r.sample <- sigma_r.sample[, group.order]
sigma_r.summary <- apply(sigma_r.sample, 2, posterior.summary, CI = TRUE)
round(sigma_r.summary, 2)

# weight parameters
eta.sample <- matrix(NA, 30000, 4)
for (i in 1:4) {
  eta.sample[, i] <- rbind(FMR.result[[1]]$eta[, label[1, i]], 
                           FMR.result[[2]]$eta[, label[2, i]], 
                           FMR.result[[3]]$eta[, label[3, i]])
}
eta.sample <- eta.sample[, group.order]
eta.summary <- apply(eta.sample, 2, posterior.summary, CI = TRUE)
round(eta.summary, 2)

# proportion of latent groups
round(apply(colMeans(S.sample), 1, posterior.summary), 2)